rm(list = ls())
load("members.RData")

save(list = ls(all = TRUE), file= "members.RData")

getwd()

rm(list = ls())
rm()


install.packages("")


library(readxl)
library(XML)
library(tidyr)
library(stringi)
library(tidyverse) 
library(tidygraph)
library(networkD3)
library(ineq)
library(xlsx)





#################################
########## IMPORT ###############
#################################

# kvrk
kvrk_2018 <- read_excel("input/KV2018reg20181008_xlsx/kvrk.xlsx")
kvrk_2014 <- read_excel("input/KV2014reg20141014_xlsx/kvrk.xlsx")
kvrk_2010 <- read_excel("input/ZO2010/KV10-RK.xlsx")
kvrk_2006 <- read_excel("input/ZO2006/KV06-RK.xlsx")
kvrk_2002 <- read_excel("input/RKKV2002.xlsx")
kvrk_1998 <- read_excel("input/RKKV1998.xlsx")
kvrk_1994 <- read_excel("input/RKKV1994.xlsx")

# kvrzcoco
kvrzcoco_2018 <- read_excel("input/KV2018reg20181008_xlsx/kvrzcoco.xlsx")
kvrzcoco_2014 <- read_excel("input/KV2014reg20141014_xlsx/kvrzcoco.xlsx")
kvrzcoco_2010 <- read_excel("input/ZO2010/KV10-RZ.xlsx")
kvrzcoco_2006 <- read_excel("input/ZO2010/KV10-RZ.xlsx")

# cpp
cpp_2018 <- read_excel("input/KV2018ciselniky20181004/cpp.xlsx")
cpp_2014 <- xmlParse("input/KV2014ciselniky20141008/cpp.xml")
cpp_2014 <- xmlToDataFrame(cpp_2014)
cpp_2010 <- xmlParse("input/KV2010ciselniky20140903/cpp.xml")
cpp_2010 <- xmlToDataFrame(cpp_2010)
cpp_2006 <- xmlParse("input/KV2006ciselniky20140909/cpp.xml")
cpp_2006 <- xmlToDataFrame(cpp_2006)
cpp_2002 <- read_excel("input/cpp_2002.xlsx")
cpp_1998 <- read_excel("input/cpp_1998.xlsx")
cpp_1994 <- read_excel("input/cpp_1994.xlsx")

# cnumnuts
cnumnuts_2018 <- read_excel("input/KV2018ciselniky20181004/cnumnuts.xlsx")
cnumnuts_old <- read_excel("input/okresy_old.xls")

# cnumnuts inhabitants
inhab <- read_excel("input/okresy.xlsx")

# party members
members <- read_excel("input/members.xlsx", na = "NA")

# population
pop_2018 <- read_excel("input/population/2018.xlsx")
pop_2014 <- read_excel("input/population/2014.xlsx")
pop_2010 <- read_excel("input/population/2010.xls")
pop_2006 <- read_excel("input/population/2006.xls")


#################################
########## ADJUST ###############
#################################

# population
pop_2018 <- pop_2018[7:nrow(pop_2018),c(2,3,4)]
colnames(pop_2018) <- c("OBEC","NAZEVOBCE","POPULATION")
pop_2018 <- pop_2018[!is.na(pop_2018$POPULATION),]
pop_2018$POPULATION <- as.numeric(pop_2018$POPULATION)
pop_2018$OBEC <- as.numeric(pop_2018$OBEC)

pop_2014 <- pop_2014[7:nrow(pop_2014),c(2,3,4)]
colnames(pop_2014) <- c("OBEC","NAZEVOBCE","POPULATION")
pop_2014 <- pop_2014[!is.na(pop_2014$POPULATION),]
pop_2014$POPULATION <- as.numeric(pop_2014$POPULATION)
pop_2014$OBEC <- as.numeric(pop_2014$OBEC)

pop_2010 <- pop_2010[7:nrow(pop_2010),c(2,3,4)]
colnames(pop_2010) <- c("OBEC","NAZEVOBCE","POPULATION")
pop_2010 <- pop_2010[!is.na(pop_2010$POPULATION),]
pop_2010$POPULATION <- as.numeric(pop_2010$POPULATION)
pop_2010$OBEC <- as.numeric(pop_2010$OBEC)

pop_2006 <- pop_2006[7:nrow(pop_2006),c(2,3,4)]
colnames(pop_2006) <- c("OBEC","NAZEVOBCE","POPULATION")
pop_2006 <- pop_2006[!is.na(pop_2006$POPULATION),]
pop_2006$POPULATION <- as.numeric(pop_2006$POPULATION)
pop_2006$OBEC <- as.numeric(pop_2006$OBEC)

# separation of degrees in 1994
kvrk_1994 <- separate(data = kvrk_1994, col = PRIJMENI, into = c("PRIJMENI", "TITUL1", "TITUL2", "TITUL3"), sep = "\\ ")
kvrk_1994$TITUL1 <- ifelse(kvrk_1994$TITUL1=="-",NA,kvrk_1994$TITUL1)
kvrk_1994$TITUL1 <- ifelse(kvrk_1994$TITUL1==",",NA,kvrk_1994$TITUL1)
kvrk_1994$TITUL1 <- ifelse(kvrk_1994$TITUL1==".",NA,kvrk_1994$TITUL1)
kvrk_1994$PRIJMENI <- ifelse(kvrk_1994$PRIJMENI=="De",paste(kvrk_1994$PRIJMENI,kvrk_1994$TITUL1),kvrk_1994$PRIJMENI)
kvrk_1994$TITUL1 <- ifelse(substr(kvrk_1994$PRIJMENI,1,3)=="De ",NA,kvrk_1994$TITUL1)
surnames <- c("Mierna","Favero","Mierny","Kucej","Zahrádský","Záhradský","Maliki","Karibová","Abdel","Vuurenová",
              "st.","ml.","st.,","ml.,","st","ml","(ml.)")
for (i in 1:length(surnames)) {
  kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL1)] <- ifelse(kvrk_1994$TITUL1[!is.na(kvrk_1994$TITUL1)]==surnames[i],paste(kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL1)],kvrk_1994$TITUL1[!is.na(kvrk_1994$TITUL1)]),kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL1)])
  kvrk_1994$TITUL1[!is.na(kvrk_1994$TITUL1)] <- ifelse(kvrk_1994$TITUL1[!is.na(kvrk_1994$TITUL1)]==surnames[i],NA,kvrk_1994$TITUL1[!is.na(kvrk_1994$TITUL1)])
}
surnames <- c("Živná","Wahab")
for (i in 1:length(surnames)) {
  kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL2)] <- ifelse(kvrk_1994$TITUL2[!is.na(kvrk_1994$TITUL2)]==surnames[i],paste(kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL2)],kvrk_1994$TITUL2[!is.na(kvrk_1994$TITUL2)]),kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL2)])
  kvrk_1994$TITUL2[!is.na(kvrk_1994$TITUL2)] <- ifelse(kvrk_1994$TITUL2[!is.na(kvrk_1994$TITUL2)]==surnames[i],NA,kvrk_1994$TITUL2[!is.na(kvrk_1994$TITUL2)])
}
kvrk_1994$PRIJMENI <- ifelse(substr(kvrk_1994$PRIJMENI,nchar(kvrk_1994$PRIJMENI),nchar(kvrk_1994$PRIJMENI))==","
                                    ,substr(kvrk_1994$PRIJMENI,1,nchar(kvrk_1994$PRIJMENI)-1)
                                    ,kvrk_1994$PRIJMENI)
kvrk_1994$PRIJMENI <- ifelse(kvrk_1994$JMENO=="Martin" & kvrk_1994$PRIJMENI=="Weis",
                             paste(kvrk_1994$PRIJMENI,kvrk_1994$TITUL1,kvrk_1994$TITUL2,kvrk_1994$TITUL3),kvrk_1994$PRIJMENI)
kvrk_1994$TITUL1 <- ifelse(kvrk_1994$PRIJMENI=="Weis Th Lic P.",NA,kvrk_1994$TITUL1)
kvrk_1994$TITUL2 <- ifelse(kvrk_1994$PRIJMENI=="Weis Th Lic P.",NA,kvrk_1994$TITUL2)
kvrk_1994$TITUL3 <- ifelse(kvrk_1994$PRIJMENI=="Weis Th Lic P.",NA,kvrk_1994$TITUL3)
kvrk_1994 <- separate(data = kvrk_1994, col = PRIJMENI, into = c("PRIJMENI","TITUL4"), sep = "\\,")
kvrk_1994 <- separate(data = kvrk_1994, col = JMENO, into = c("TITUL5", "JMENO"), sep = "\\. ", fill = "left")
kvrk_1994 <- separate(data = kvrk_1994, col = JMENO, into = c("TITUL6","TITUL7","JMENO"), sep = "\\.", fill = "left")
kvrk_1994 <- separate(data = kvrk_1994, col = JMENO, into = c("TITUL8", "JMENO"), sep = "\\ ", fill = "left")
names <- kvrk_1994$TITUL7[kvrk_1994$JMENO==""]
kvrk_1994$TITUL7 <- ifelse(kvrk_1994$JMENO=="",NA,kvrk_1994$TITUL7)
kvrk_1994$JMENO[kvrk_1994$JMENO==""] <- names
surnames <- c(" ml."," st.","ml.","st.")
for (i in 1:length(surnames)) {
  kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL4)] <- ifelse(kvrk_1994$TITUL4[!is.na(kvrk_1994$TITUL4)]==surnames[i],paste(kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL4)],kvrk_1994$TITUL4[!is.na(kvrk_1994$TITUL4)]),kvrk_1994$PRIJMENI[!is.na(kvrk_1994$TITUL4)])
  kvrk_1994$TITUL4[!is.na(kvrk_1994$TITUL4)] <- ifelse(kvrk_1994$TITUL4[!is.na(kvrk_1994$TITUL4)]==surnames[i],NA,kvrk_1994$TITUL4[!is.na(kvrk_1994$TITUL4)])
}
surnames <- c("Bohumil","Jan","Ladislav","Luisa","Maří","Máří","Štefan","Vít")
for (i in 1:length(surnames)) {
  kvrk_1994$JMENO[!is.na(kvrk_1994$TITUL8)] <- ifelse(kvrk_1994$TITUL8[!is.na(kvrk_1994$TITUL8)]==surnames[i],paste(kvrk_1994$TITUL8[!is.na(kvrk_1994$TITUL8)],kvrk_1994$JMENO[!is.na(kvrk_1994$TITUL8)]),kvrk_1994$JMENO[!is.na(kvrk_1994$TITUL8)])
  kvrk_1994$TITUL8[!is.na(kvrk_1994$TITUL8)] <- ifelse(kvrk_1994$TITUL8[!is.na(kvrk_1994$TITUL8)]==surnames[i],NA,kvrk_1994$TITUL8[!is.na(kvrk_1994$TITUL8)])
}
rm(names,surnames,i)
kvrk_1994$TITUL1 <- ifelse(is.na(kvrk_1994$TITUL1),"",kvrk_1994$TITUL1)
kvrk_1994$TITUL2 <- ifelse(is.na(kvrk_1994$TITUL2),"",kvrk_1994$TITUL2)
kvrk_1994$TITUL3 <- ifelse(is.na(kvrk_1994$TITUL3),"",kvrk_1994$TITUL3)
kvrk_1994$TITUL4 <- ifelse(is.na(kvrk_1994$TITUL4),"",kvrk_1994$TITUL4)
kvrk_1994$TITUL5 <- ifelse(is.na(kvrk_1994$TITUL5),"",kvrk_1994$TITUL5)
kvrk_1994$TITUL6 <- ifelse(is.na(kvrk_1994$TITUL6),"",kvrk_1994$TITUL6)
kvrk_1994$TITUL7 <- ifelse(is.na(kvrk_1994$TITUL7),"",kvrk_1994$TITUL7)
kvrk_1994$TITUL8 <- ifelse(is.na(kvrk_1994$TITUL8),"",kvrk_1994$TITUL8)
kvrk_1994$TITUL <- paste0(kvrk_1994$TITUL1,kvrk_1994$TITUL2,kvrk_1994$TITUL3,kvrk_1994$TITUL4,
                         kvrk_1994$TITUL5,kvrk_1994$TITUL6,kvrk_1994$TITUL7,kvrk_1994$TITUL8)
kvrk_1994$TITUL <- ifelse(kvrk_1994$TITUL=="",NA,kvrk_1994$TITUL)
kvrk_1994 <- kvrk_1994[ , -which(names(kvrk_1994) %in% c("TITUL1","TITUL2","TITUL3","TITUL4","TITUL5","TITUL6","TITUL7","TITUL8"))]


# solving problems with university degrees
kvrk_1998$TITUL <- ifelse(kvrk_1998$TITUL %in% c("19111951","260808463","441118064","6712121977","6911192068"),NA,kvrk_1998$TITUL)
kvrk_2002$TITULPRED <- ifelse(kvrk_2002$TITULPRED==".",NA,kvrk_2002$TITULPRED)
kvrk_2002$TITULZA <- ifelse(kvrk_2002$TITULZA %in% c(".","¨",","),NA,kvrk_2002$TITULZA)

kvrk_2002$PRIJMENI <- ifelse(kvrk_2002$TITULZA %in% c("ml."),paste(kvrk_2002$PRIJMENI,kvrk_2002$TITULZA),kvrk_2002$PRIJMENI)
kvrk_2002$TITULZA <- ifelse(kvrk_2002$TITULZA %in% c("ml."),NA,kvrk_2002$TITULZA)
kvrk_2002$PRIJMENI <- ifelse(kvrk_2002$TITULZA %in% c("st."),paste(kvrk_2002$PRIJMENI,kvrk_2002$TITULZA),kvrk_2002$PRIJMENI)
kvrk_2002$TITULZA <- ifelse(kvrk_2002$TITULZA %in% c("st."),NA,kvrk_2002$TITULZA)
kvrk_2002$TITULPRED <- ifelse(kvrk_2002$TITULZA %in% c("Sršňová","Lebedová","Procházka","Černý","Jebavá","Kohout","Pešík","Buchner"),
                              kvrk_2002$JMENO,kvrk_2002$TITULPRED)
kvrk_2002$JMENO <- ifelse(kvrk_2002$TITULZA %in% c("Sršňová","Lebedová","Procházka","Černý","Jebavá","Kohout","Pešík","Buchner"),
                              kvrk_2002$PRIJMENI,kvrk_2002$JMENO)
kvrk_2002$PRIJMENI <- ifelse(kvrk_2002$TITULZA %in% c("Sršňová","Lebedová","Procházka","Černý","Jebavá","Kohout","Pešík","Buchner"),
                          kvrk_2002$TITULZA,kvrk_2002$PRIJMENI)
kvrk_2002$TITULZA <- ifelse(kvrk_2002$TITULZA %in% c("Sršňová","Lebedová","Procházka","Černý","Jebavá","Kohout","Pešík","Buchner"),
                             NA,kvrk_2002$TITULZA)


# coding university education
kvrk_1994$VS <- ifelse(is.na(kvrk_1994$TITUL),0,1)
kvrk_1998$VS <- ifelse(is.na(kvrk_1998$TITUL),0,1)
kvrk_2002$VS <- ifelse(is.na(kvrk_2002$TITULPRED) & is.na(kvrk_2002$TITULZA),0,1)
kvrk_2006$VS <- ifelse(is.na(kvrk_2006$TITULPRED) & is.na(kvrk_2006$TITULZA),0,1)
kvrk_2010$VS <- ifelse(is.na(kvrk_2010$TITULPRED) & is.na(kvrk_2010$TITULZA),0,1)
kvrk_2014$VS <- ifelse(is.na(kvrk_2014$TITULPRED) & is.na(kvrk_2014$TITULZA),0,1)
kvrk_2018$VS <- ifelse(is.na(kvrk_2018$TITULPRED) & is.na(kvrk_2018$TITULZA),0,1)


# coding gender
kvrk_1994$ZENA <- ifelse(stri_sub(kvrk_1994$PRIJMENI,-1,-1)=="á",1,0)
kvrk_1998$ZENA <- ifelse(stri_sub(kvrk_1998$PRIJMENI,-1,-1)=="á",1,0)
kvrk_2002$ZENA <- ifelse(stri_sub(kvrk_2002$PRIJMENI,-1,-1)=="á",1,0)
kvrk_2006$ZENA <- ifelse(stri_sub(kvrk_2006$PRIJMENI,-1,-1)=="á",1,0)
kvrk_2010$ZENA <- ifelse(stri_sub(kvrk_2010$PRIJMENI,-1,-1)=="á",1,0)
kvrk_2014$ZENA <- ifelse(stri_sub(kvrk_2014$PRIJMENI,-1,-1)=="á",1,0)
kvrk_2018$ZENA <- ifelse(stri_sub(kvrk_2018$PRIJMENI,-1,-1)=="á",1,0)


# codes of Czech regions
cnumnuts_2018 <- cnumnuts_2018[,1:3]
cnumnuts_2018 <- cnumnuts_2018[!is.na(cnumnuts_2018$NUTS),]
cnumnuts_2018 <- cnumnuts_2018[nchar(cnumnuts_2018$NUTS)>=4,]
cnumnuts <- cnumnuts_2018[nchar(cnumnuts_2018$NUTS)>=6,]
colnames(cnumnuts) <- c("NUMNUTS","NUTSOKRES","NAZEVNUTSOKRES")
cnumnuts$NUTSKRAJ <- substr(cnumnuts$NUTSOKRES,1,5)
cnumnuts <- merge(cnumnuts,cnumnuts_2018[,c("NUTS","NAZEVNUTS")],by.x = "NUTSKRAJ", by.y = "NUTS")
colnames(cnumnuts)[5] <- "NAZEVNUTSKRAJ"
cnumnuts$NUTSREG <- substr(cnumnuts$NUTSOKRES,1,4)
cnumnuts <- merge(cnumnuts,cnumnuts_2018[,c("NUTS","NAZEVNUTS")],by.x = "NUTSREG", by.y = "NUTS")
colnames(cnumnuts)[7] <- "NAZEVNUTSREG"
cnumnuts_2018 <- cnumnuts
cnumnuts_2018 <- merge(cnumnuts_2018,cnumnuts_old[,c("NAZEVNUTS","OKRES_OLD")],by.x = "NAZEVNUTSOKRES", by.y = "NAZEVNUTS")
rm(cnumnuts,cnumnuts_old)
cnumnuts_2018$NUMNUTS[cnumnuts_2018$NAZEVNUTSKRAJ=="Hlavní město Praha"] <- 1100

# codes of municipalities
kvrk_2018 <- merge(kvrk_2018,cnumnuts_2018[,c("NUMNUTS","NUTSOKRES","NUTSKRAJ","NUTSREG","NAZEVNUTSOKRES","NAZEVNUTSKRAJ","NAZEVNUTSREG")],
                   by.x = "OKRES", by.y = "NUMNUTS")
kvrk_2014 <- merge(kvrk_2014,cnumnuts_2018[,c("NUMNUTS","NUTSOKRES","NUTSKRAJ","NUTSREG","NAZEVNUTSOKRES","NAZEVNUTSKRAJ","NAZEVNUTSREG")],
                   by.x = "OKRES", by.y = "NUMNUTS")
kvrk_2010 <- merge(kvrk_2010,cnumnuts_2018[,c("NUMNUTS","NUTSOKRES","NUTSKRAJ","NUTSREG","NAZEVNUTSOKRES","NAZEVNUTSKRAJ","NAZEVNUTSREG")],
                   by.x = "OKRES", by.y = "NUMNUTS")
kvrk_2006 <- merge(kvrk_2006,cnumnuts_2018[,c("NUMNUTS","NUTSOKRES","NUTSKRAJ","NUTSREG","NAZEVNUTSOKRES","NAZEVNUTSKRAJ","NAZEVNUTSREG")],
                   by.x = "OKRES", by.y = "NUMNUTS")
kvrk_2002 <- merge(kvrk_2002,cnumnuts_2018[,c("NUMNUTS","NUTSOKRES","NUTSKRAJ","NUTSREG","NAZEVNUTSOKRES","NAZEVNUTSKRAJ","NAZEVNUTSREG")],
                   by.x = "OKRES", by.y = "NUMNUTS")
kvrk_1998 <- merge(kvrk_1998,cnumnuts_2018[,c("OKRES_OLD","NUTSOKRES","NUTSKRAJ","NUTSREG","NAZEVNUTSOKRES","NAZEVNUTSKRAJ","NAZEVNUTSREG")],
                   by.x = "OKRES_OLD", by.y = "OKRES_OLD")
kvrk_1994 <- merge(kvrk_1994,cnumnuts_2018[,c("OKRES_OLD","NUTSOKRES","NUTSKRAJ","NUTSREG","NAZEVNUTSOKRES","NAZEVNUTSKRAJ","NAZEVNUTSREG")],
                   by.x = "OKRES_OLD", by.y = "OKRES_OLD")


# type of municipality
kvrk_2018 <- merge(kvrk_2018,kvrzcoco_2018[!duplicated(kvrzcoco_2018$KODZASTUP),c("KODZASTUP","TYPZASTUP")],by = "KODZASTUP")
kvrk_2014 <- merge(kvrk_2014,kvrzcoco_2014[!duplicated(kvrzcoco_2014$KODZASTUP),c("KODZASTUP","TYPZASTUP")],by = "KODZASTUP")
kvrk_2010 <- merge(kvrk_2010,kvrzcoco_2010[!duplicated(kvrzcoco_2010$KODZASTUP),c("KODZASTUP","TYPZASTUP")],by = "KODZASTUP")
kvrk_2006 <- merge(kvrk_2006,kvrzcoco_2006[!duplicated(kvrzcoco_2006$KODZASTUP),c("KODZASTUP","TYPZASTUP")],by = "KODZASTUP")


# exclude duplicated candidates from a municipality level
kvrk_2018$DUPLICATE <- paste(kvrk_2018$JMENO,kvrk_2018$PRIJMENI,kvrk_2018$VEK,kvrk_2018$PSTRANA,kvrk_2018$OKRES,
                             kvrk_2018$KRAJ, sep = ",")
kvrk_2018 <- kvrk_2018[order(-kvrk_2018$TYPZASTUP),] 
kvrk_2018 <- kvrk_2018[!duplicated(kvrk_2018$DUPLICATE),]
kvrk_2014$DUPLICATE <- paste(kvrk_2014$JMENO,kvrk_2014$PRIJMENI,kvrk_2014$VEK,kvrk_2014$PSTRANA,kvrk_2014$OKRES,
                             kvrk_2014$KRAJ, sep = ",")
kvrk_2014 <- kvrk_2014[order(-kvrk_2014$TYPZASTUP),] 
kvrk_2014 <- kvrk_2014[!duplicated(kvrk_2014$DUPLICATE),]
kvrk_2010$DUPLICATE <- paste(kvrk_2010$JMENO,kvrk_2010$PRIJMENI,kvrk_2010$VEK,kvrk_2010$PSTRANA,kvrk_2010$OKRES,
                             kvrk_2010$KRAJ, sep = ",")
kvrk_2010 <- kvrk_2010[order(-kvrk_2010$TYPZASTUP),] 
kvrk_2010 <- kvrk_2010[!duplicated(kvrk_2010$DUPLICATE),]
kvrk_2006$DUPLICATE <- paste(kvrk_2006$JMENO,kvrk_2006$PRIJMENI,kvrk_2006$VEK,kvrk_2006$PSTRANA,kvrk_2006$OKRES,
                             kvrk_2006$KRAJ, sep = ",")
kvrk_2006 <- kvrk_2006[order(-kvrk_2006$TYPZASTUP),] 
kvrk_2006 <- kvrk_2006[!duplicated(kvrk_2006$DUPLICATE),]
kvrk_2002$DUPLICATE <- paste(kvrk_2002$JMENO,kvrk_2002$PRIJMENI,kvrk_2002$VEK,kvrk_2002$PSTRANA,kvrk_2002$OKRES,
                             kvrk_2002$KRAJ, sep = ",")
kvrk_2002 <- kvrk_2002[order(-kvrk_2002$TYPZASTUP),] 
kvrk_2002 <- kvrk_2002[!duplicated(kvrk_2002$DUPLICATE),]
kvrk_1998$DUPLICATE <- paste(kvrk_1998$JMENO,kvrk_1998$PRIJMENI,kvrk_1998$VEK,kvrk_1998$PSTRANA,kvrk_1998$OKRES,
                             kvrk_1998$KRAJ, sep = ",")
kvrk_1998 <- kvrk_1998[order(-kvrk_1998$TYPZASTUP),] 
kvrk_1998 <- kvrk_1998[!duplicated(kvrk_1998$DUPLICATE),]
kvrk_1994$DUPLICATE <- paste(kvrk_1994$JMENO,kvrk_1994$PRIJMENI,kvrk_1994$VEK,kvrk_1994$PSTRANA,kvrk_1994$OKRES,
                             kvrk_1994$KRAJ, sep = ",")
kvrk_1994 <- kvrk_1994[order(-kvrk_1994$TYPZASTUP),] 
kvrk_1994 <- kvrk_1994[!duplicated(kvrk_1994$DUPLICATE),]


# identification of candidates
kvrk_2018$IDENTITY <- paste(kvrk_2018$JMENO,kvrk_2018$PRIJMENI,kvrk_2018$VEK,kvrk_2018$KODZASTUP, sep = ",")
kvrk_2014$IDENTITY <- paste(kvrk_2014$JMENO,kvrk_2014$PRIJMENI,(kvrk_2014$VEK+4),kvrk_2014$KODZASTUP, sep = ",")
kvrk_2010$IDENTITY <- paste(kvrk_2010$JMENO,kvrk_2010$PRIJMENI,(kvrk_2010$VEK+8),kvrk_2010$KODZASTUP, sep = ",")
kvrk_2006$IDENTITY <- paste(kvrk_2006$JMENO,kvrk_2006$PRIJMENI,(kvrk_2006$VEK+12),kvrk_2006$KODZASTUP, sep = ",")
kvrk_2002$IDENTITY <- paste(kvrk_2002$JMENO,kvrk_2002$PRIJMENI,(kvrk_2002$VEK+16),kvrk_2002$KODZASTUP, sep = ",")
kvrk_1998$IDENTITY <- paste(kvrk_1998$JMENO,kvrk_1998$PRIJMENI,(kvrk_1998$VEK+20),kvrk_1998$KODZASTUP, sep = ",")
kvrk_1994$IDENTITY <- paste(kvrk_1994$JMENO,kvrk_1994$PRIJMENI,(kvrk_1994$VEK+24),kvrk_1994$KODZASTUP, sep = ",")


# names of the parties
cpp_2002$ZKRATKAP30 <- NA
cpp_2002$ZKRATKAP8 <- NA
cpp_1998$ZKRATKAP30 <- NA
cpp_1998$ZKRATKAP8 <- NA
cpp_1994$ZKRATKAP30 <- NA
cpp_1994$ZKRATKAP8 <- NA
cpp <- rbind(cpp_2018,cpp_2014,cpp_2010,cpp_2006,cpp_2002,cpp_1998,cpp_1994)
cpp <- cpp[!duplicated(cpp$PSTRANA), ]
kvrk_2018 <- merge(kvrk_2018,cpp,by = "PSTRANA")
kvrk_2014 <- merge(kvrk_2014,cpp,by = "PSTRANA")
kvrk_2010 <- merge(kvrk_2010,cpp,by = "PSTRANA")
kvrk_2006 <- merge(kvrk_2006,cpp,by = "PSTRANA")
kvrk_2002 <- merge(kvrk_2002,cpp,by = "PSTRANA")
kvrk_1998 <- merge(kvrk_1998,cpp,by = "PSTRANA")
kvrk_1994 <- merge(kvrk_1994,cpp,by = "PSTRANA")

# identification of parties
parties <- c("KDU-ČSL","KSČM","ČSSD","ODS","ODA","SPR-RSČ","US-DEU","Zelení","VV","TOP 09","ANO","Úsvit","SPD","STAN","Piráti")
parties_lab <- c("KDU-CSL","KSCM","CSSD","ODS","ODA","SPR-RSC","US-DEU","GREENS","VV","TOP 09","ANO","DAWN","SPD","STAN","PIRATES")
parties_num <- NULL
for (i in 1:length(parties)) {
  parties_num[i] <- cpp$PSTRANA[cpp$ZKRATKAP8==parties[i]][1]
}
parties_num[parties_num=="1173"] <- "13" # replace new ODA with the old one
parties_num <- as.numeric(parties_num)
npm_num <- 99 # IND.
oparties_num <- as.numeric(cpp$PSTRANA[!(cpp$PSTRANA %in% parties_num)]) # other parties
oparties_num <- oparties_num[oparties_num!=99]


# population
kvrzcoco_2018 <- kvrzcoco_2018[kvrzcoco_2018$OBVODY!=2,]
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==8119 & kvrzcoco_2018$TYPZASTUP==2, 554821,kvrzcoco_2018$OBEC) # Ostrava
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==1000 & kvrzcoco_2018$TYPZASTUP==2, 554782,kvrzcoco_2018$OBEC) # Praha
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==4214 & kvrzcoco_2018$TYPZASTUP==2, 554804,kvrzcoco_2018$OBEC) # Ústí nad Labem
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==3209 & kvrzcoco_2018$TYPZASTUP==2, 554791,kvrzcoco_2018$OBEC) # Plzeň
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==5105 & kvrzcoco_2018$TYPZASTUP==2, 563889,kvrzcoco_2018$OBEC) # Liberec
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==5309 & kvrzcoco_2018$TYPZASTUP==2, 555134,kvrzcoco_2018$OBEC) # Pardubice
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==6203 & kvrzcoco_2018$TYPZASTUP==2, 582786,kvrzcoco_2018$OBEC) # Brno
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$ORP==8117 & kvrzcoco_2018$TYPZASTUP==2, 505927,kvrzcoco_2018$OBEC) # Opava
towns <- as.numeric(names(table(kvrzcoco_2018$KODZASTUP)[table(kvrzcoco_2018$KODZASTUP)>1]))
kvrzcoco_2018$OBEC <- ifelse(kvrzcoco_2018$KODZASTUP %in% towns, kvrzcoco_2018$KODZASTUP, kvrzcoco_2018$OBEC)
kvrk_2018 <- merge(kvrk_2018,kvrzcoco_2018[!duplicated(kvrzcoco_2018$KODZASTUP),c("KODZASTUP","OBEC")],by = "KODZASTUP")
kvrk_2018 <- merge(kvrk_2018,pop_2018[,c("POPULATION","OBEC")],by = "OBEC")

kvrzcoco_2014 <- kvrzcoco_2014[kvrzcoco_2014$OBVODY!=2,]
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==8106 & kvrzcoco_2014$TYPZASTUP==2, 554821,kvrzcoco_2014$OBEC) # Ostrava
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==1100 & kvrzcoco_2014$TYPZASTUP==2, 554782,kvrzcoco_2014$OBEC) # Praha
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==4207 & kvrzcoco_2014$TYPZASTUP==2, 554804,kvrzcoco_2014$OBEC) # Ústí nad Labem
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==3203 & kvrzcoco_2014$TYPZASTUP==2, 554791,kvrzcoco_2014$OBEC) # Plzeň
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==5103 & kvrzcoco_2014$TYPZASTUP==2, 563889,kvrzcoco_2014$OBEC) # Liberec
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==5302 & kvrzcoco_2014$TYPZASTUP==2, 555134,kvrzcoco_2014$OBEC) # Pardubice
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==6202 & kvrzcoco_2014$TYPZASTUP==2, 582786,kvrzcoco_2014$OBEC) # Brno
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$OKRES==8105 & kvrzcoco_2014$TYPZASTUP==2, 505927,kvrzcoco_2014$OBEC) # Opava
towns <- as.numeric(names(table(kvrzcoco_2014$KODZASTUP)[table(kvrzcoco_2014$KODZASTUP)>1]))
kvrzcoco_2014$OBEC <- ifelse(kvrzcoco_2014$KODZASTUP %in% towns, kvrzcoco_2014$KODZASTUP, kvrzcoco_2014$OBEC)
kvrk_2014 <- merge(kvrk_2014,kvrzcoco_2014[!duplicated(kvrzcoco_2014$KODZASTUP),c("KODZASTUP","OBEC")],by = "KODZASTUP")
kvrk_2014 <- merge(kvrk_2014,pop_2014[,c("POPULATION","OBEC")],by = "OBEC")

kvrzcoco_2010 <- kvrzcoco_2010[kvrzcoco_2010$OBVODY!=2,]
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==8106 & kvrzcoco_2010$TYPZASTUP==2, 554821,kvrzcoco_2010$OBEC) # Ostrava
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==1100 & kvrzcoco_2010$TYPZASTUP==2, 554782,kvrzcoco_2010$OBEC) # Praha
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==4207 & kvrzcoco_2010$TYPZASTUP==2, 554804,kvrzcoco_2010$OBEC) # Ústí nad Labem
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==3203 & kvrzcoco_2010$TYPZASTUP==2, 554791,kvrzcoco_2010$OBEC) # Plzeň
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==5103 & kvrzcoco_2010$TYPZASTUP==2, 563889,kvrzcoco_2010$OBEC) # Liberec
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==5302 & kvrzcoco_2010$TYPZASTUP==2, 555134,kvrzcoco_2010$OBEC) # Pardubice
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==6202 & kvrzcoco_2010$TYPZASTUP==2, 582786,kvrzcoco_2010$OBEC) # Brno
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$OKRES==8105 & kvrzcoco_2010$TYPZASTUP==2, 505927,kvrzcoco_2010$OBEC) # Opava
towns <- as.numeric(names(table(kvrzcoco_2010$KODZASTUP)[table(kvrzcoco_2010$KODZASTUP)>1]))
kvrzcoco_2010$OBEC <- ifelse(kvrzcoco_2010$KODZASTUP %in% towns, kvrzcoco_2010$KODZASTUP, kvrzcoco_2010$OBEC)
kvrk_2010 <- merge(kvrk_2010,kvrzcoco_2010[!duplicated(kvrzcoco_2010$KODZASTUP),c("KODZASTUP","OBEC")],by = "KODZASTUP")
kvrk_2010 <- merge(kvrk_2010,pop_2010[,c("POPULATION","OBEC")],by = "OBEC", all.x = T)
for(i in as.numeric(rownames(kvrk_2010[is.na(kvrk_2010$POPULATION),]))){
  kvrk_2010$POPULATION[i] <- kvrzcoco_2010$POCOBYV[kvrzcoco_2010$KODZASTUP==kvrk_2010$KODZASTUP[i]]
}


kvrzcoco_2006 <- kvrzcoco_2006[kvrzcoco_2006$OBVODY!=2,]
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==8106 & kvrzcoco_2006$TYPZASTUP==2, 554821,kvrzcoco_2006$OBEC) # Ostrava
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==1100 & kvrzcoco_2006$TYPZASTUP==2, 554782,kvrzcoco_2006$OBEC) # Praha
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==4207 & kvrzcoco_2006$TYPZASTUP==2, 554804,kvrzcoco_2006$OBEC) # Ústí nad Labem
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==3203 & kvrzcoco_2006$TYPZASTUP==2, 554791,kvrzcoco_2006$OBEC) # Plzeň
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==5103 & kvrzcoco_2006$TYPZASTUP==2, 563889,kvrzcoco_2006$OBEC) # Liberec
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==5302 & kvrzcoco_2006$TYPZASTUP==2, 555134,kvrzcoco_2006$OBEC) # Pardubice
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==6202 & kvrzcoco_2006$TYPZASTUP==2, 582786,kvrzcoco_2006$OBEC) # Brno
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$OKRES==8105 & kvrzcoco_2006$TYPZASTUP==2, 505927,kvrzcoco_2006$OBEC) # Opava
towns <- as.numeric(names(table(kvrzcoco_2006$KODZASTUP)[table(kvrzcoco_2006$KODZASTUP)>1]))
kvrzcoco_2006$OBEC <- ifelse(kvrzcoco_2006$KODZASTUP %in% towns, kvrzcoco_2006$KODZASTUP, kvrzcoco_2006$OBEC)
kvrk_2006 <- merge(kvrk_2006,kvrzcoco_2006[!duplicated(kvrzcoco_2006$KODZASTUP),c("KODZASTUP","OBEC")],by = "KODZASTUP")
kvrk_2006 <- merge(kvrk_2006,pop_2006[,c("POPULATION","OBEC")],by = "OBEC", all.x = T)
for(i in as.numeric(rownames(kvrk_2006[is.na(kvrk_2006$POPULATION),]))){
  kvrk_2006$POPULATION[i] <- kvrzcoco_2006$POCOBYV[kvrzcoco_2006$KODZASTUP==kvrk_2006$KODZASTUP[i]]
}

kvrk_2002 <- merge(kvrk_2002,kvrzcoco_2006[!duplicated(kvrzcoco_2006$KODZASTUP),c("KODZASTUP","OBEC")],by = "KODZASTUP", all.x = T)
kvrk_2002 <- merge(kvrk_2002,pop_2006[,c("POPULATION","OBEC")],by = "OBEC", all.x = T)

kvrk_1998 <- merge(kvrk_1998,kvrzcoco_2006[!duplicated(kvrzcoco_2006$KODZASTUP),c("KODZASTUP","OBEC")],by = "KODZASTUP", all.x = T)
kvrk_1998 <- merge(kvrk_1998,pop_2006[,c("POPULATION","OBEC")],by = "OBEC", all.x = T)

kvrk_1994 <- merge(kvrk_1994,kvrzcoco_2006[!duplicated(kvrzcoco_2006$KODZASTUP),c("KODZASTUP","OBEC")],by = "KODZASTUP", all.x = T)
kvrk_1994 <- merge(kvrk_1994,pop_2006[,c("POPULATION","OBEC")],by = "OBEC", all.x = T)

# kvrk
kvrk <- list(kvrk_1994,kvrk_1998,kvrk_2002,kvrk_2006,kvrk_2010,kvrk_2014,kvrk_2018)

# unique candidates
kvrk_unique <- rbind(kvrk_1994[,c("IDENTITY","PSTRANA")],kvrk_1998[,c("IDENTITY","PSTRANA")],
                     kvrk_2002[,c("IDENTITY","PSTRANA")],kvrk_2006[,c("IDENTITY","PSTRANA")],
                     kvrk_2010[,c("IDENTITY","PSTRANA")],kvrk_2014[,c("IDENTITY","PSTRANA")],
                     kvrk_2018[,c("IDENTITY","PSTRANA")])
kvrk_unique <- kvrk_unique[!duplicated(kvrk_unique$IDENTITY),]
sort(table(kvrk_unique$PSTRANA),decreasing = T)



#######################################################
############### DESCRIPTIVE ANALYSIS ##################
#######################################################

# number od candidates
pdf("output/large_parties.pdf", width=10, height=5)
options("scipen"=100, "digits"=4)
par(mfrow=c(2,2),oma = c(5,5,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times") # the largest parties
for (k in c(1,2)) {
  party <- parties_num[k]
  age <- NULL
  for (i in 1:7) {
    age[i] <- length(kvrk[[i]]$VEK[kvrk[[i]]$PSTRANA==party])
  }
  table <- rbind(age,t(as.matrix((members[,parties_lab[k]]))))
  table[2,] <- table[2,] - table[1,]
  x <- barplot(table, axes = FALSE, ylim = c(0,220000), 
       main = "", xlab = "", ylab = "")
  abline(h=seq(0,220000,20000), lty = 'dashed', col = "gray70")
  barplot(table, axes = FALSE, ylim = c(0,220000), 
          main = "", xlab = "", ylab = "", add = T)
  axis(side = 2, at=seq(0,220000,20000),
       labels = if (k %in% c(1)) as.character(seq(0,220,20)) else FALSE, las = 2, cex.axis = 1.3)
  axis(side = 1, x,
       labels = if (k %in% c()) c("1994","1998","2002","2006","2010","2014","2018") else FALSE, cex.axis = 1.3)
  box(which = "plot", bty = "l")
  text(x[4],180000, pos = 3,parties_lab[k], cex = 1.5)
  text(x,table[1,]-5000,round(table[1,]/colSums(table)*100,1), pos = 3, col = "black", cex = 1.1)
} 
for (k in c(3,4)) {
  party <- parties_num[k]
  age <- NULL
  for (i in 1:7) {
    age[i] <- length(kvrk[[i]]$VEK[kvrk[[i]]$PSTRANA==party])
  }
  table <- rbind(age,t(as.matrix((members[,parties_lab[k]]))))
  table[2,] <- table[2,] - table[1,]
  x <- barplot(table, axes = FALSE, ylim = c(0,35000), main = "", xlab = "", ylab = "")
  abline(h=seq(0,35000,5000), lty = 'dashed', col = "gray70")
  barplot(table, axes = FALSE, ylim = c(0,35000), main = "", xlab = "", ylab = "", add=T)
  axis(side = 2, at=seq(0,35000,5000),
       labels = if (k %in% c(3)) as.character(seq(0,35,5)) else FALSE, las = 2, cex.axis = 1.3)
  axis(side = 1, x,
       labels = if (k %in% c(3,4)) c("1994","1998","2002","2006","2010","2014","2018") else FALSE, cex.axis = 1.3)
  box(which = "plot", bty = "l")
  text(x[4],30000, pos = 3,parties_lab[k], cex = 1.5)
  text(x,table[1,],round(table[1,]/colSums(table)*100,1), pos = 3, col = "black", cex = 1.1)
} 
title(xlab = "Year", ylab = "Party members and candidates (thousands)", outer = TRUE, cex.lab = 1.5)
dev.off()


pdf("output/small_parties.pdf", width=10, height=7)
par(mfrow=c(4,3),oma = c(5,5,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times") # small parties
for (k in c(5,6,7,8,9,10,11,12,13,14,15)) {
  party <- parties_num[k]
  age <- NULL
  for (i in 1:7) {
    age[i] <- length(kvrk[[i]]$VEK[kvrk[[i]]$PSTRANA==party])
  }
  table <- rbind(age,t(as.matrix((members[,parties_lab[k]]))))
  table[2,] <- table[2,] - table[1,]
  labeling <- round(table[1,]/colSums(table)*100,1)
  table <- ifelse(table<0, 0, table)
  x <- barplot(table, axes = FALSE, ylim = c(0,6200), 
               main = "", xlab = "", ylab = "")
  abline(h=seq(0,5000,1000), lty = 'dashed', col = "gray70")
  barplot(table, axes = FALSE, ylim = c(0,5000), main = "", xlab = "", ylab = "", add = T)
  axis(side = 2, at=seq(0,5000,1000),
       labels = if (k %in% c(5,8,11,14)) as.character(seq(0,5,1)) else FALSE, las = 2, cex.axis = 1.3)
  axis(side = 1, x,
       labels = if (k %in% c(13:15)) c("1994","1998","2002","2006","2010","2014","2018") else FALSE, cex.axis = 1.3)
  box(which = "plot", bty = "l")
  text(4,5000, pos = 3,parties_lab[k], cex = 1.8)
  text(x,colSums(table),labeling, pos = 3, col = "black", cex = 1.5)
} 
title(xlab = "Year", ylab = "Party members and candidates (thousands)", outer = TRUE, cex.lab = 1.8)
dev.off()


# kvrk
kvrk_mod <- kvrk
  

# exlusion of some candidates
kvrk_mod[[5]]$PSTRANA <- ifelse(kvrk_mod[[5]]$PSTRANA==768,2,kvrk_mod[[5]]$PSTRANA) # ANO was founded in 2012

kvrk_mod[[6]]$PSTRANA <- ifelse(kvrk_mod[[6]]$PSTRANA==792,2,kvrk_mod[[6]]$PSTRANA) # DAWN had less than 50 candidates in 2014

kvrk_mod[[4]]$PSTRANA <- ifelse(kvrk_mod[[4]]$PSTRANA==144,2,kvrk_mod[[4]]$PSTRANA) # VV had less than 50 candidates in 2006
kvrk_mod[[3]]$PSTRANA <- ifelse(kvrk_mod[[3]]$PSTRANA==144,2,kvrk_mod[[3]]$PSTRANA) # VV had less than 50 candidates in 2002

kvrk_mod[[5]]$PSTRANA <- ifelse(kvrk_mod[[5]]$PSTRANA==11,2,kvrk_mod[[5]]$PSTRANA) # SPR-RSC had less than 50 candidates in 2006
kvrk_mod[[3]]$PSTRANA <- ifelse(kvrk_mod[[3]]$PSTRANA==11,2,kvrk_mod[[3]]$PSTRANA) # SPR-RSC had less than 50 candidates in 2002

kvrk_mod[[5]]$PSTRANA <- ifelse(kvrk_mod[[5]]$PSTRANA==166,2,kvrk_mod[[5]]$PSTRANA) # STAN had less than 50 candidates in 2010
kvrk_mod[[4]]$PSTRANA <- ifelse(kvrk_mod[[4]]$PSTRANA==166,2,kvrk_mod[[4]]$PSTRANA) # STAN had less than 50 candidates in 2006

kvrk_mod[[4]]$PSTRANA <- ifelse(kvrk_mod[[4]]$PSTRANA==13,2,kvrk_mod[[4]]$PSTRANA) # ODA had less than 50 candidates in 2006

kvrk_mod[[5]]$PSTRANA <- ifelse(kvrk_mod[[5]]$PSTRANA==102,2,kvrk_mod[[5]]$PSTRANA) # US-DEU had less than 50 candidates in 2010


# exclude DAWN from the parties and include it as the other party
parties <- parties[-12]
parties_lab <- parties_lab[-12]
parties_num <- parties_num[-12]
oparties_num <- c(oparties_num,792)


# age of party members
pdf("output/age.pdf", width=10, height=7)
age_total <- matrix(NA, 16, 7)
dimnames(age_total) <- list(c(parties_lab,"OTHERS","IND."), c("1994","1998","2002","2006","2010","2014","2018"))
par(mfrow=c(4,4),oma = c(5,4,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times")
for (k in 1:length(parties)) {
  party <- parties_num[k]
  age  <- age_lower <- age_upper <- NULL
  for (i in 1:7) {
    age[i] <- mean(kvrk_mod[[i]]$VEK[kvrk_mod[[i]]$PSTRANA==party])
    ages <- kvrk_mod[[i]]$VEK[kvrk_mod[[i]]$PSTRANA==party]
    bootmeans <- rep(NA,1000)
    for (x in 1:1000){
      bootmeans[x] <- mean(sample(x=ages,size=length(ages),replace=T))
    }
    age_lower[i] <- if (is.nan(age[i])) NaN else quantile(bootmeans,c(0.005))
    age_upper[i] <- if (is.nan(age[i])) NaN else quantile(bootmeans,c(0.995))
  }
  plot(1:7, age, axes = FALSE, type = "b", ylim = c(30,70), 
       main = "", xlab = "", ylab = "", lwd = 2, pch = 4)
  polygon(x = c(1:7,rev(1:7))[!is.nan(c(age_lower,rev(age_upper)))], 
          y = c(age_lower,rev(age_upper))[!is.nan(c(age_lower,rev(age_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
  axis(side = 2, at=seq(30,70,10),
       labels = if (k %in% c(1,5,9,13)) as.character(seq(30,70,10)) else FALSE, las = 2, cex.axis = 1.3)
  axis(side = 1, at=1:7,
       labels = if (k %in% c(13,14)) c("1994","1998","2002","2006","2010","2014","2018") else FALSE, cex.axis = 1.3)
  box(which = "plot", bty = "l")
  grid(col = "gray60")
  text(4,60, pos = 3,parties_lab[k], cex = 1.5)
  age_total[k,] <- round(age,2)
}
age  <- age_lower <- age_upper <- NULL
for (i in 1:7) {
  age[i] <- mean(kvrk_mod[[i]]$VEK[kvrk_mod[[i]]$PSTRANA %in% oparties_num])
  ages <- kvrk_mod[[i]]$VEK[kvrk_mod[[i]]$PSTRANA %in% oparties_num]
  bootmeans <- rep(NA,1000)
  for (x in 1:1000){
    bootmeans[x] <- mean(sample(x=ages,size=length(ages),replace=T))
  }
  age_lower[i] <- if (is.nan(age[i])) NaN else quantile(bootmeans,c(0.005))
  age_upper[i] <- if (is.nan(age[i])) NaN else quantile(bootmeans,c(0.995))
}
plot(1:7, age, axes = FALSE, type = "b", ylim = c(30,70), 
     main = "", xlab = "", ylab = "", lwd = 2, pch = 4)
polygon(x = c(1:7,rev(1:7))[!is.nan(c(age_lower,rev(age_upper)))], 
        y = c(age_lower,rev(age_upper))[!is.nan(c(age_lower,rev(age_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
axis(side = 2, at=seq(30,70,10), labels = FALSE, las = 2, cex.axis = 1.3)
axis(side = 1, at=1:7, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
grid(col = "gray60")
text(4,60, pos = 3, "OTHERS", cex = 1.5)
age_total[15,] <- round(age,2)
age  <- age_lower <- age_upper <- NULL
for (i in 1:7) {
  age[i] <- mean(kvrk_mod[[i]]$VEK[kvrk_mod[[i]]$PSTRANA==99])
  ages <- kvrk_mod[[i]]$VEK[kvrk_mod[[i]]$PSTRANA==99]
  bootmeans <- rep(NA,10)
  for (x in 1:10){
    bootmeans[x] <- mean(sample(x=ages,size=length(ages),replace=T))
  }
  age_lower[i] <- if (is.nan(age[i])) NaN else quantile(bootmeans,c(0.005))
  age_upper[i] <- if (is.nan(age[i])) NaN else quantile(bootmeans,c(0.995))
}
plot(1:7, age, axes = FALSE, type = "b", ylim = c(30,70), 
     main = "", xlab = "", ylab = "", lwd = 2, pch = 4)
polygon(x = c(1:7,rev(1:7))[!is.nan(c(age_lower,rev(age_upper)))], 
        y = c(age_lower,rev(age_upper))[!is.nan(c(age_lower,rev(age_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
axis(side = 2, at=seq(30,70,10), labels = FALSE, cex.axis = 1.3)
axis(side = 1, at=1:7, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
grid(col = "gray60")
text(4,60, pos = 3, "INDEPENDENTS", cex = 1.5)
title(xlab = "Year", ylab = "Average age of party candidates", outer = TRUE, cex.lab = 1.5)
age_total[16,] <- round(age,2)
dev.off()


# education and gender of party members
pdf("output/gender.pdf", width=10, height=7)
par(mfrow=c(4,4),oma = c(5,4,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times")
gender_total <- matrix(NA, 16, 7)
dimnames(gender_total) <- list(c(parties_lab,"OTHERS","IND."), c("1994","1998","2002","2006","2010","2014","2018"))
education_total <- matrix(NA, 16, 7)
dimnames(education_total) <- list(c(parties_lab,"OTHERS","IND."), c("1994","1998","2002","2006","2010","2014","2018"))
for (k in 1:length(parties)) {
  party <- parties_num[k]
  gender  <- gender_lower <- gender_upper <- NULL
  for (i in 1:7) {
    gender[i] <- sum(kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA==party])/length(kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA==party])*100
    genders <- kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA==party]
    bootmeans <- rep(NA,1000)
    for (x in 1:1000){
      bootmeans[x] <- sum(sample(x=genders,size=length(genders),replace=T))/length(genders)*100
    }
    gender_lower[i] <- if (is.nan(gender[i])) NaN else quantile(bootmeans,c(0.005))
    gender_upper[i] <- if (is.nan(gender[i])) NaN else quantile(bootmeans,c(0.995))
  }
  plot(1:7, gender, axes = FALSE, type = "b", ylim = c(0,80), 
       main = "", xlab = "", ylab = "", lwd = 2, pch = 1)
  polygon(x = c(1:7,rev(1:7))[!is.nan(c(gender_lower,rev(gender_upper)))], 
          y = c(gender_lower,rev(gender_upper))[!is.nan(c(gender_lower,rev(gender_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
  education  <- education_lower <- education_upper <- NULL
  for (i in 1:7) {
    education[i] <- sum(kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA==party])/length(kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA==party])*100
    educations <- kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA==party]
    bootmeans <- rep(NA,1000)
    for (x in 1:1000){
      bootmeans[x] <- sum(sample(x=educations,size=length(educations),replace=T))/length(educations)*100
    }
    education_lower[i] <- if (is.nan(education[i])) NaN else quantile(bootmeans,c(0.005))
    education_upper[i] <- if (is.nan(education[i])) NaN else quantile(bootmeans,c(0.995))
  }
  lines(1:7, education, type = "b", lwd = 2, col = "black", pch = 4)
  polygon(x = c(1:7,rev(1:7))[!is.nan(c(education_lower,rev(education_upper)))], 
          y = c(education_lower,rev(education_upper))[!is.nan(c(education_lower,rev(education_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
  axis(side = 2, at=seq(0,80,20),
       labels = if (k %in% c(1,5,9,13)) as.character(seq(0,80,20)) else FALSE, las = 2, cex.axis = 1.3)
  axis(side = 1, at=1:7,
       labels = if (k %in% c(13,14)) c("1994","1998","2002","2006","2010","2014","2018") else FALSE, cex.axis = 1.3)
  box(which = "plot", bty = "l")
  grid(col = "gray60")
  text(4,60, pos = 3,parties_lab[k], cex = 1.5)
  gender_total[k,] <- round(gender,2)
  education_total[k,] <- round(education,2)
}
gender  <- gender_lower <- gender_upper <- NULL
for (i in 1:7) {
  gender[i] <- sum(kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA %in% oparties_num])/length(kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA %in% oparties_num])*100
  genders <- kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA %in% oparties_num]
  bootmeans <- rep(NA,1000)
  for (x in 1:1000){
    bootmeans[x] <- sum(sample(x=genders,size=length(genders),replace=T))/length(genders)*100
  }
  gender_lower[i] <- if (is.nan(gender[i])) NaN else quantile(bootmeans,c(0.005))
  gender_upper[i] <- if (is.nan(gender[i])) NaN else quantile(bootmeans,c(0.995))
}
plot(1:7, gender, axes = FALSE, type = "b", ylim = c(0,80), 
     main = "", xlab = "", ylab = "", lwd = 2, pch = 1)
polygon(x = c(1:7,rev(1:7))[!is.nan(c(gender_lower,rev(gender_upper)))], 
        y = c(gender_lower,rev(gender_upper))[!is.nan(c(gender_lower,rev(gender_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
education  <- education_lower <- education_upper <- NULL
for (i in 1:7) {
  education[i] <- sum(kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA %in% oparties_num])/length(kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA %in% oparties_num])*100
  educations <- kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA %in% oparties_num]
  bootmeans <- rep(NA,1000)
  for (x in 1:1000){
    bootmeans[x] <- sum(sample(x=educations,size=length(educations),replace=T))/length(educations)*100
  }
  education_lower[i] <- if (is.nan(education[i])) NaN else quantile(bootmeans,c(0.005))
  education_upper[i] <- if (is.nan(education[i])) NaN else quantile(bootmeans,c(0.995))
}
lines(1:7, education, type = "b", lwd = 2, col = "black", pch = 4)
polygon(x = c(1:7,rev(1:7))[!is.nan(c(education_lower,rev(education_upper)))], 
        y = c(education_lower,rev(education_upper))[!is.nan(c(education_lower,rev(education_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
axis(side = 2, at=seq(0,80,10), labels = FALSE, las = 2, cex.axis = 1.3)
axis(side = 1, at=1:7, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
grid(col = "gray60")
text(4,60, pos = 3, "OTHERS", cex = 1.5)
gender_total[15,] <- round(gender,2)
education_total[15,] <- round(education,2)
gender  <- gender_lower <- gender_upper <- NULL
for (i in 1:7) {
  gender[i] <- sum(kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA==99])/length(kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA==99])*100
  genders <- kvrk_mod[[i]]$ZENA[kvrk_mod[[i]]$PSTRANA==99]
  bootmeans <- rep(NA,10)
  for (x in 1:10){
    bootmeans[x] <- sum(sample(x=genders,size=length(genders),replace=T))/length(genders)*100
  }
  gender_lower[i] <- if (is.nan(gender[i])) NaN else quantile(bootmeans,c(0.005))
  gender_upper[i] <- if (is.nan(gender[i])) NaN else quantile(bootmeans,c(0.995))
}
plot(1:7, gender, axes = FALSE, type = "b", ylim = c(0,80), 
     main = "", xlab = "", ylab = "", lwd = 2, pch = 1)
polygon(x = c(1:7,rev(1:7))[!is.nan(c(gender_lower,rev(gender_upper)))], 
        y = c(gender_lower,rev(gender_upper))[!is.nan(c(gender_lower,rev(gender_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
education  <- education_lower <- education_upper <- NULL
for (i in 1:7) {
  education[i] <- sum(kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA==99])/length(kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA==99])*100
  educations <- kvrk_mod[[i]]$VS[kvrk_mod[[i]]$PSTRANA==99]
  bootmeans <- rep(NA,10)
  for (x in 1:10){
    bootmeans[x] <- sum(sample(x=educations,size=length(educations),replace=T))/length(educations)*100
  }
  education_lower[i] <- if (is.nan(education[i])) NaN else quantile(bootmeans,c(0.005))
  education_upper[i] <- if (is.nan(education[i])) NaN else quantile(bootmeans,c(0.995))
}
lines(1:7, education, type = "b", lwd = 2, col = "black", pch = 4)
polygon(x = c(1:7,rev(1:7))[!is.nan(c(education_lower,rev(education_upper)))], 
        y = c(education_lower,rev(education_upper))[!is.nan(c(education_lower,rev(education_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
axis(side = 2, at=seq(0,80,10), labels = FALSE, cex.axis = 1.3)
axis(side = 1, at=1:7, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
grid(col = "gray60")
text(4,60, pos = 3, "INDEPENDENTS", cex = 1.5)
title(xlab = "Year", ylab = "Gender and education of party candidates (per cent of female and university-educated)", outer = TRUE, cex.lab = 1.5)
gender_total[16,] <- round(gender,2)
education_total[16,] <- round(education,2)
dev.off()


# population
pdf("output/population.pdf", width=10, height=7)
population_total <- matrix(NA, 16, 7)
dimnames(population_total) <- list(c(parties_lab,"OTHERS","IND."), c("1994","1998","2002","2006","2010","2014","2018"))
par(mfrow=c(4,4),oma = c(5,4,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times")
for (k in 1:length(parties)) {
  party <- parties_num[k]
  population  <- population_lower <- population_upper <- NULL
  for (i in 1:7) {
    population[i] <- mean(kvrk_mod[[i]]$POPULATION[kvrk_mod[[i]]$PSTRANA==party & !is.na(kvrk_mod[[i]]$POPULATION)])
    populations <- kvrk_mod[[i]]$POPULATION[kvrk_mod[[i]]$PSTRANA==party & !is.na(kvrk_mod[[i]]$POPULATION)]
    bootmeans <- rep(NA,1000)
    for (x in 1:1000){
      bootmeans[x] <- mean(sample(x=populations,size=length(populations),replace=T))
    }
    population_lower[i] <- if (is.nan(population[i])) NaN else quantile(bootmeans,c(0.005))
    population_upper[i] <- if (is.nan(population[i])) NaN else quantile(bootmeans,c(0.995))
  }
  x <- barplot(population, axes = FALSE, ylim = c(0,600000), 
       main = "", xlab = "", ylab = "", lwd = 2, pch = 4)
  arrows(x0=x,y0=population_upper,y1=population_lower,angle=90,code=3,length=0.03)
  axis(side = 2, at=seq(0,600000,100000),
       labels = if (k %in% c(1,5,9,13)) as.character(seq(0,600,100)) else FALSE, las = 2, cex.axis = 1.3)
  axis(side = 1, at=x,
       labels = if (k %in% c(13,14)) c("1994","1998","2002","2006","2010","2014","2018") else FALSE, cex.axis = 1.3)
  box(which = "plot", bty = "l")
  abline(h=c(seq(0,600000,100000)), col = "gray70", lt = "dashed")
  text(4,500000, pos = 3, parties_lab[k], cex = 1.5)
  population_total[k,] <- round(population,2)
}
population  <- population_lower <- population_upper <- NULL
for (i in 1:7) {
  population[i] <- mean(kvrk_mod[[i]]$POPULATION[kvrk_mod[[i]]$PSTRANA %in% oparties_num & !is.na(kvrk_mod[[i]]$POPULATION)])
  populations <- kvrk_mod[[i]]$POPULATION[kvrk_mod[[i]]$PSTRANA %in% oparties_num & !is.na(kvrk_mod[[i]]$POPULATION)]
  bootmeans <- rep(NA,1000)
  for (x in 1:1000){
    bootmeans[x] <- mean(sample(x=populations,size=length(populations),replace=T))
  }
  population_lower[i] <- if (is.nan(population[i])) NaN else quantile(bootmeans,c(0.005))
  population_upper[i] <- if (is.nan(population[i])) NaN else quantile(bootmeans,c(0.995))
}
x <- barplot(population, axes = FALSE, ylim = c(0,600000), 
     main = "", xlab = "", ylab = "", lwd = 2, pch = 4)
arrows(x0=x,y0=population_upper,y1=population_lower,angle=90,code=3,length=0.03)
axis(side = 2, at=seq(0,600000,100000), labels = FALSE, las = 2, cex.axis = 1.3)
axis(side = 1, at=x, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
abline(h=c(seq(0,600000,100000)), col = "gray70", lt = "dashed")
text(4,500000, pos = 3, "OTHERS", cex = 1.5)
population_total[15,] <- round(population,2)
population  <- population_lower <- population_upper <- NULL
for (i in 1:7) {
  population[i] <- mean(kvrk_mod[[i]]$POPULATION[kvrk_mod[[i]]$PSTRANA==99 & !is.na(kvrk_mod[[i]]$POPULATION)])
  populations <- kvrk_mod[[i]]$POPULATION[kvrk_mod[[i]]$PSTRANA==99 & !is.na(kvrk_mod[[i]]$POPULATION)]
  bootmeans <- rep(NA,10)
  for (x in 1:10){
    bootmeans[x] <- mean(sample(x=populations,size=length(populations),replace=T))
  }
  population_lower[i] <- if (is.nan(population[i])) NaN else quantile(bootmeans,c(0.005))
  population_upper[i] <- if (is.nan(population[i])) NaN else quantile(bootmeans,c(0.995))
}
x <- barplot(population, axes = FALSE, ylim = c(0,600000), 
     main = "", xlab = "", ylab = "", lwd = 2, pch = 4)
arrows(x0=x,y0=population_upper,y1=population_lower,angle=90,code=3,length=0.03)
axis(side = 2, at=seq(0,600000,100000), labels = FALSE, cex.axis = 1.3)
axis(side = 1, at=x, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
abline(h=c(seq(0,600000,100000)), col = "gray70", lt = "dashed")
#text(4,if (!is.nan(population[4])) population[4]+200000 else 500000, "INDEPENDENTS", cex = 1.5)
text(4,500000, pos = 3, "INDEPENDENTS", cex = 1.5)
title(xlab = "Year", ylab = "Average city population of party candidates' residence (thousands)", outer = TRUE, cex.lab = 1.5)
population_total[16,] <- round(population,2)
dev.off()





#######################################################
############### ANALYSIS OF PARTY CHANGES #############
#######################################################

# change names of columns
colnames(kvrk_2018) <- paste0(colnames(kvrk_2018),"_2018")
colnames(kvrk_2014) <- paste0(colnames(kvrk_2014),"_2014")
colnames(kvrk_2010) <- paste0(colnames(kvrk_2010),"_2010")
colnames(kvrk_2006) <- paste0(colnames(kvrk_2006),"_2006")
colnames(kvrk_2002) <- paste0(colnames(kvrk_2002),"_2002")
colnames(kvrk_1998) <- paste0(colnames(kvrk_1998),"_1998")
colnames(kvrk_1994) <- paste0(colnames(kvrk_1994),"_1994")

# combination of datasets
cand_2014_2018 <- merge(kvrk_2014,kvrk_2018,by.x = "IDENTITY_2014", by.y = "IDENTITY_2018")
cand_2014_2018 <- cand_2014_2018[!duplicated(cand_2014_2018[,c("IDENTITY_2014")]), ]
cand_2014_2018$PARTY_CHANGE <- as.numeric(cand_2014_2018$PSTRANA_2018 != cand_2014_2018$PSTRANA_2014)
cand_2010_2014 <- merge(kvrk_2010,kvrk_2014,by.x = "IDENTITY_2010", by.y = "IDENTITY_2014")
cand_2010_2014 <- cand_2010_2014[!duplicated(cand_2010_2014[,c("IDENTITY_2010")]), ]
cand_2010_2014$PARTY_CHANGE <- as.numeric(cand_2010_2014$PSTRANA_2014 != cand_2010_2014$PSTRANA_2010)
cand_2006_2010 <- merge(kvrk_2006,kvrk_2010,by.x = "IDENTITY_2006", by.y = "IDENTITY_2010")
cand_2006_2010 <- cand_2006_2010[!duplicated(cand_2006_2010[,c("IDENTITY_2006")]), ]
cand_2006_2010$PARTY_CHANGE <- as.numeric(cand_2006_2010$PSTRANA_2010 != cand_2006_2010$PSTRANA_2006)
cand_2002_2006 <- merge(kvrk_2002,kvrk_2006,by.x = "IDENTITY_2002", by.y = "IDENTITY_2006")
cand_2002_2006 <- cand_2002_2006[!duplicated(cand_2002_2006[,c("IDENTITY_2002")]), ]
cand_2002_2006$PARTY_CHANGE <- as.numeric(cand_2002_2006$PSTRANA_2006 != cand_2002_2006$PSTRANA_2002)
cand_1998_2002 <- merge(kvrk_1998,kvrk_2002,by.x = "IDENTITY_1998", by.y = "IDENTITY_2002")
cand_1998_2002 <- cand_1998_2002[!duplicated(cand_1998_2002[,c("IDENTITY_1998")]), ]
cand_1998_2002$PARTY_CHANGE <- as.numeric(cand_1998_2002$PSTRANA_2002 != cand_1998_2002$PSTRANA_1998)
cand_1994_1998 <- merge(kvrk_1994,kvrk_1998,by.x = "IDENTITY_1994", by.y = "IDENTITY_1998")
cand_1994_1998 <- cand_1994_1998[!duplicated(cand_1994_1998[,c("IDENTITY_1994")]), ]
cand_1994_1998$PARTY_CHANGE <- as.numeric(cand_1994_1998$PSTRANA_1998 != cand_1994_1998$PSTRANA_1994)

# moves between parties
moving <- function(cand,kvrk1,kvrk2){
  party_moves <- as.data.frame(matrix(0,nrow(cpp),nrow(cpp)))
  colnames(party_moves) <- cpp$PSTRANA
  rownames(party_moves) <- cpp$PSTRANA
  for(i in 1:nrow(cand)){
    old <- cand[,c(paste0("PSTRANA_",substr(deparse(substitute(cand)),6,9)),paste0("PSTRANA_",substr(deparse(substitute(cand)),11,14)))][i,1]
    new <- cand[,c(paste0("PSTRANA_",substr(deparse(substitute(cand)),6,9)),paste0("PSTRANA_",substr(deparse(substitute(cand)),11,14)))][i,2]
    party_moves[as.character(old),as.character(new)] <- party_moves[as.character(old),as.character(new)] + 1
  }
  party_moves$INACTIVE <- 0
  data <- kvrk1[!(kvrk1[,c(paste0("IDENTITY","_", substr(deparse(substitute(cand)),6,9)))] %in% kvrk2[,c(paste0("IDENTITY","_", substr(deparse(substitute(cand)),11,14)))]),]
  for(i in 1:nrow(party_moves)){
    party_moves[i,ncol(party_moves)] <- sum(data[,c(paste0("PSTRANA","_", substr(deparse(substitute(cand)),6,9)))]==rownames(party_moves[i,]))
  }
  party_moves <- rbind(party_moves,NA)
  rownames(party_moves)[nrow(party_moves)] <- "INACTIVE"
  data <- kvrk2[!(kvrk2[,c(paste0("IDENTITY","_", substr(deparse(substitute(cand)),11,14)))] %in% kvrk1[,c(paste0("IDENTITY","_", substr(deparse(substitute(cand)),6,9)))]),]
  for(i in 1:(ncol(party_moves)-1)){
    party_moves[nrow(party_moves),i] <- sum(data[,c(paste0("PSTRANA","_", substr(deparse(substitute(cand)),11,14)))]==colnames(party_moves)[i])
  }
  party_moves$'OTHERS' <- rowSums(party_moves[,as.character(oparties_num)])
  party_moves <- party_moves[,-which(names(party_moves) %in% as.character(oparties_num))]
  party_moves <- rbind(party_moves,colSums(party_moves[as.character(oparties_num),]))
  party_moves <- party_moves[-which(rownames(party_moves) %in% as.character(oparties_num)),]
  rownames(party_moves)[length(rownames(party_moves))] <- "OTHERS"
  for (i in 1:length(parties_num)) {
    colnames(party_moves)[colnames(party_moves) %in% as.character(parties_num)[i]] <- parties_lab[i]
  }
  for (i in 1:length(parties_num)) {
    rownames(party_moves)[rownames(party_moves) %in% as.character(parties_num)[i]] <- parties_lab[i]
  }
  colnames(party_moves)[colnames(party_moves) == "99"] <- "IND."
  rownames(party_moves)[rownames(party_moves) == "99"] <- "IND."
  colnames(party_moves) <- paste0(colnames(party_moves)," (", substr(deparse(substitute(cand)),11,14),")")
  rownames(party_moves) <- paste0(rownames(party_moves)," (", substr(deparse(substitute(cand)),6,9),")")
  party_moves <- party_moves[,c(colnames(party_moves[,1:(ncol(party_moves)-2)]),paste0("OTHERS"," (", substr(deparse(substitute(cand)),11,14),")"),paste0("INACTIVE"," (", substr(deparse(substitute(cand)),11,14),")"))]
  party_moves <- party_moves[c(rownames(party_moves[1:(nrow(party_moves)-2),]),paste0("OTHERS"," (", substr(deparse(substitute(cand)),6,9),")"),paste0("INACTIVE"," (", substr(deparse(substitute(cand)),6,9),")")),]
  party_moves_1 <- party_moves[1:16,1:16]
  party_moves_exit <- party_moves
  party_moves_exit$total <- rowSums(party_moves_exit)
  for(i in 1:(ncol(party_moves_exit)-1)){
    party_moves_exit[,i] <- round(party_moves_exit[,i]/party_moves_exit[,ncol(party_moves_exit)]*100,1)
  }
  party_moves_exit$total <- round(rowSums(party_moves_exit[,1:(ncol(party_moves_exit)-1)]),1)
  party_moves_entry <- party_moves
  party_moves_entry <- rbind(party_moves_entry,colSums(party_moves_entry))
  for(i in 1:(nrow(party_moves_entry)-1)){
    party_moves_entry[i,] <- round(party_moves_entry[i,]/party_moves_entry[nrow(party_moves_entry),]*100,1)
  }
  party_moves_entry[nrow(party_moves_entry),] <- round(colSums(party_moves_entry[1:(nrow(party_moves_entry))-1,]),1)
  party_moves_exit_1 <- party_moves_1
  party_moves_exit_1$total <- rowSums(party_moves_exit_1)
  for(i in 1:(ncol(party_moves_exit_1)-1)){
    party_moves_exit_1[,i] <- round(party_moves_exit_1[,i]/party_moves_exit_1[,ncol(party_moves_exit_1)]*100,1)
  }
  party_moves_exit_1$total <- round(rowSums(party_moves_exit_1[,1:(ncol(party_moves_exit_1)-1)]),1)
  party_moves_entry_1 <- party_moves_1
  party_moves_entry_1 <- rbind(party_moves_entry_1,colSums(party_moves_entry_1))
  for(i in 1:(nrow(party_moves_entry_1)-1)){
    party_moves_entry_1[i,] <- round(party_moves_entry_1[i,]/party_moves_entry_1[nrow(party_moves_entry_1),]*100,1)
  }
  party_moves_entry_1[nrow(party_moves_entry_1),] <- round(colSums(party_moves_entry_1[1:(nrow(party_moves_entry_1))-1,]),1)
  party_moves_1994_1998 <- list(party_moves,party_moves_exit,party_moves_entry,party_moves_1,party_moves_exit_1,party_moves_entry_1)
  rm(party_moves,party_moves_exit,party_moves_entry,new,old,party_moves_1,party_moves_exit_1,party_moves_entry_1,data)
  return(party_moves_1994_1998)
}


party_moves_2014_2018 <- moving(cand_2014_2018,kvrk_2014,kvrk_2018)
party_moves_2010_2014 <- moving(cand_2010_2014,kvrk_2010,kvrk_2014)
party_moves_2006_2010 <- moving(cand_2006_2010,kvrk_2006,kvrk_2010)
party_moves_2002_2006 <- moving(cand_2002_2006,kvrk_2002,kvrk_2006)
party_moves_1998_2002 <- moving(cand_1998_2002,kvrk_1998,kvrk_2002)
party_moves_1994_1998 <- moving(cand_1994_1998,kvrk_1994,kvrk_1998)


# sankeyNetwork
party_moves <- cbind(rownames(party_moves),party_moves)
party_moves <- gather(party_moves, condition, flow, 'KDU-CSL (2018)':'OTHERS (2018)', factor_key=TRUE)
colnames(party_moves) <- c("old_party","new_party","members")
nodes=data.frame(name=c(as.character(party_moves$old_party), as.character(party_moves$new_party)) %>% unique())
party_moves$IDsource=match(party_moves$old_party, nodes$name)-1 
party_moves$IDtarget=match(party_moves$new_party, nodes$name)-1
sankeyNetwork(Links = party_moves[party_moves$members!=0,], Nodes = nodes,
              Source = 'IDsource', Target = 'IDtarget',
              Value = 'members', NodeID = "name", units = "people", 
              sinksRight=TRUE)




# where did members come from
pdf("output/from.pdf", width=10, height=6)
par(mfrow=c(1,1),oma = c(8,3,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times")
data_bar <- as.matrix(party_moves_1994_1998[[3]][,c("US-DEU (1998)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_1994_1998[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "US-DEU (1998)"
x <- barplot(data_bar, xlim = c(0,15), width = 1.4, las = 2)
labeling <- function(){y <- c(0,data_bar[data_bar[,1]>=3,1])
for (i in length(y):2) {
  y[i] <- y[i]/2 + sum(y[1:i-1])}
text(x,y[2:length(y)]+1, labels = paste0(substr(rownames(data_bar)[data_bar[,1]>=3],1,nchar(rownames(data_bar)[data_bar[,1]>=3])-7)), 
     pos = 1, cex = 0.9, col = "white")
text(x,y[2:length(y)]+1, labels = paste0(data_bar[data_bar>=3,1]," %"), 
     cex = 0.9, col = "white")}
labeling()
data_bar <- as.matrix(party_moves_2002_2006[[3]][,c("GREENS (2006)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_2002_2006[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "GREENS (2006)"
x <- barplot(data_bar, space = 1.5, add = T, width = 1.4, las = 2, axes = FALSE)
labeling()
data_bar <- as.matrix(party_moves_2006_2010[[3]][,c("VV (2010)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_2006_2010[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "VV (2010)"
x <- barplot(data_bar, space = 2.8, add = T, width = 1.4, las = 2, axes = FALSE)
labeling()
data_bar <- as.matrix(party_moves_2006_2010[[3]][,c("TOP 09 (2010)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_2006_2010[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "TOP 09 (2010)"
x <- barplot(data_bar, space = 4.1, add = T, width = 1.4, las = 2, axes = FALSE)
labeling()
data_bar <- as.matrix(party_moves_2010_2014[[3]][,c("ANO (2014)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_2010_2014[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "ANO (2014)"
x <- barplot(data_bar, space = 5.4, add = T, width = 1.4, las = 2, axes = FALSE)
labeling()
data_bar <- as.matrix(party_moves_2014_2018[[3]][,c("ANO (2018)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_2014_2018[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "ANO (2018)"
x <- barplot(data_bar, space = 6.7, add = T, width = 1.4, las = 2, axes = FALSE)
labeling()
data_bar <- as.matrix(party_moves_2014_2018[[3]][,c("SPD (2018)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_2014_2018[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "SPD (2018)"
x <- barplot(data_bar, space = 8, add = T, width = 1.4, las = 2, axes = FALSE)
labeling()
data_bar <- as.matrix(party_moves_2014_2018[[3]][,c("PIRATES (2018)")][-18],16,1)
rownames(data_bar) <- rownames(party_moves_2014_2018[[3]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "PIRATES (2018)"
x <- barplot(data_bar, space = 9.3, add = T, width = 1.4, las = 2, axes = FALSE)
labeling()
dev.off()


# where did members leave
pdf("output/to.pdf", width=10, height=6)
par(mfrow=c(1,1),oma = c(0,0,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times")
data_bar <- t(as.matrix(party_moves_2014_2018[[2]][c("TOP 09 (2014)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_2014_2018[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "TOP 09 (2014)"
par(mar=c(3,9,2,1))
x <- barplot(data_bar+0.01, ylim = c(0,14), horiz = TRUE, las = 2, axes = F, width = 1.4)
axis(side = 1, at=seq(0,110,20),labels = as.character(seq(0,100,20)), las = 1)
labeling <- function(){y <- c(0,data_bar[data_bar[,1]>=3,1])
for (i in length(y):2) {
  y[i] <- y[i]/2 + sum(y[1:i-1])}
text(y[2:length(y)],x+0.2, labels = paste0(substr(rownames(data_bar)[data_bar[,1]>=3],1,nchar(rownames(data_bar)[data_bar[,1]>=3])-7)), 
     pos = 1, cex = 0.9, col = "white")
text(y[2:length(y)],x+0.2, labels = paste0(data_bar[data_bar>=3,1]," %"), 
     cex = 0.9, col = "white")}
labeling()
data_bar <- t(as.matrix(party_moves_2014_2018[[2]][c("ANO (2014)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_2014_2018[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "ANO (2014)"
x <- barplot(data_bar, space = 1.5, add = T, horiz = T, las = 2, axes = F, width = 1.4)
labeling()
data_bar <- t(as.matrix(party_moves_2010_2014[[2]][c("VV (2010)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_2010_2014[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "VV (2010)"
x <- barplot(data_bar, space = 2.8, add = T, horiz = T, las = 2, axes = F, width = 1.4)
labeling()
data_bar <- t(as.matrix(party_moves_2006_2010[[2]][c("KDU-CSL (2006)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_2006_2010[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "KDU-CSL (2006)"
x <- barplot(data_bar, space = 4.1, add = T, horiz = T, las = 2, axes = F, width = 1.4)
labeling()
data_bar <- t(as.matrix(party_moves_2002_2006[[2]][c("US-DEU (2002)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_2002_2006[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "US-DEU (2002)"
x <- barplot(data_bar, space = 5.4, add = T, horiz = T, las = 2, axes = F, width = 1.4)
labeling()
data_bar <- t(as.matrix(party_moves_1998_2002[[2]][c("SPR-RSC (1998)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_1998_2002[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "SPR-RSC (1998)"
x <- barplot(data_bar, space = 6.7, add = T, horiz = T, las = 2, axes = F, width = 1.4)
labeling()
data_bar <- t(as.matrix(party_moves_1998_2002[[2]][c("ODA (1998)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_1998_2002[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "ODA (1998)"
x <- barplot(data_bar, space = 8, add = T, horiz = T, las = 2, axes = F, width = 1.4)
labeling()
data_bar <- t(as.matrix(party_moves_1994_1998[[2]][c("ODS (1994)"),][-18],16,1))
rownames(data_bar) <- colnames(party_moves_1994_1998[[2]])[-18]
data_bar <- as.matrix(data_bar[order(-data_bar[,1]),])
colnames(data_bar) <- "ODS (1994)"
x <- barplot(data_bar, space = 9.3, add = T, horiz = T, las = 2, axes = F, width = 1.4)
labeling()
dev.off()



##########################################
########## NATIONALIZATION ###############
##########################################

pdf("output/nationalization.pdf", width=10, height=8)
par(mfrow=c(4,4),oma = c(5,4,0,0) + 0.1,mar = c(0,0,1,1) + 0.1, family="Times")
gini_total <- matrix(NA, 16, 7)
dimnames(gini_total) <- list(c(parties_lab,"OTHERS","IND."), c("1994","1998","2002","2006","2010","2014","2018"))
for (k in 1:length(parties)) {
  party <- parties_num[k]
  gini <- gini_lower <- gini_upper <- NULL
  for (i in 1:7) {
    num <- nrow(table(kvrk_mod[[i]][kvrk_mod[[i]]$PSTRANA==party,]$NAZEVNUTSOKRES))
    d <- if (num>0) merge(as.data.frame(table(kvrk_mod[[i]][kvrk_mod[[i]]$PSTRANA==party,]$NUTSOKRES)),inhab, by.x = "Var1", by.y = "NUTSOKRES", all.y = TRUE) else as.data.frame(0)
    d$Freq <- if (num>0) ifelse(is.na(d$Freq),0,d$Freq) else 0
    d$res <- if (num>0) d$Freq/d$OBYVATEL*100 else 0
    gini[i] <- if (sum(d$res)>0) 1-ineq(d$res,type="Gini") else NaN
    ginis <- kvrk_mod[[i]][kvrk_mod[[i]]$PSTRANA==party,]$NUTSOKRES
    bootmeans <- rep(NA,1000)
    for (x in 1:1000){
      bootmean <- sample(x=ginis,size=length(ginis),replace=T)
      num <- nrow(table(bootmean))
      d <- if (num>0) merge(as.data.frame(table(bootmean)),inhab, by.x = "bootmean", by.y = "NUTSOKRES", all.y = TRUE) else as.data.frame(0)
      d$Freq <- if (num>0) ifelse(is.na(d$Freq),0,d$Freq) else 0
      d$res <- if (num>0) d$Freq/d$OBYVATEL*100 else 0
      bootmeans[x] <- if (sum(d$res)>0) 1-ineq(d$res,type="Gini") else NaN
    }
    gini_lower[i] <- if (is.nan(gini[i])) NaN else quantile(bootmeans,c(0.005))
    gini_upper[i] <- if (is.nan(gini[i])) NaN else quantile(bootmeans,c(0.995))
  }
  plot(1:7, gini, axes = FALSE, type = "b", ylim = c(0,1), 
       main = "", xlab = "", ylab = "", lwd = 2, lty = "solid", pch = 1)
  #polygon(x = c(1:7,rev(1:7))[!is.nan(c(gini_lower,rev(gini_upper)))], 
  #        y = c(gini_lower,rev(gini_upper))[!is.nan(c(gini_lower,rev(gini_upper)))], col = adjustcolor("black", alpha = 0.1), border = F)
  axis(side = 2, at=seq(0,1,0.2),
       labels = if (k %in% c(1,5,9,13)) as.character(seq(0,1,0.2)) else FALSE, las = 2, cex.axis = 1.3)
  axis(side = 1, at=1:7,
       labels = if (k %in% c(13,14)) c("1994","1998","2002","2006","2010","2014","2018") else FALSE, cex.axis = 1.3)
  box(which = "plot", bty = "l")
  grid(col = "grey70")
  text(4,0.3,parties_lab[k], cex = 1.5)
  #text(1:7, gini, labels = round(gini,2), pos = 3)
  gini_total[k,] <- round(gini,3)
}
gini <- NULL
for (i in 1:7) {
  num <- nrow(table(kvrk_mod[[i]][kvrk_mod[[i]]$PSTRANA %in% oparties_num,]$NAZEVNUTSOKRES))
  d <- if (num>0) merge(as.data.frame(table(kvrk_mod[[i]][kvrk_mod[[i]]$PSTRANA %in% oparties_num,]$NUTSOKRES)),inhab, by.x = "Var1", by.y = "NUTSOKRES", all.y = TRUE) else as.data.frame(0)
  d$Freq <- if (num>0) ifelse(is.na(d$Freq),0,d$Freq) else 0
  d$res <- if (num>0) d$Freq/d$OBYVATEL*100 else 0
  gini[i] <- if (sum(d$res)>0) 1-ineq(d$res,type="Gini") else NaN
}
plot(1:7, gini, axes = FALSE, type = "b", ylim = c(0,1), 
     main = "", xlab = "", ylab = "", lwd = 2)
axis(side = 2, at=seq(0,1,0.2), labels = FALSE, las = 2, cex.axis = 1.3)
axis(side = 1, at=1:7, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
grid(col = "grey70")
text(4,0.3, "OTHERS", cex = 1.5)
#text(1:7, gini, labels = round(gini,2), pos = 3)
gini_total[15,] <- round(gini,3)
gini <- NULL
for (i in 1:7) {
  num <- nrow(table(kvrk_mod[[i]][kvrk_mod[[i]]$PSTRANA==99,]$NAZEVNUTSOKRES))
  d <- if (num>0) merge(as.data.frame(table(kvrk_mod[[i]][kvrk_mod[[i]]$PSTRANA==99,]$NUTSOKRES)),inhab, by.x = "Var1", by.y = "NUTSOKRES", all.y = TRUE) else as.data.frame(0)
  d$Freq <- if (num>0) ifelse(is.na(d$Freq),0,d$Freq) else 0
  d$res <- if (num>0) d$Freq/d$OBYVATEL*100 else 0
  gini[i] <- if (sum(d$res)>0) 1-ineq(d$res,type="Gini") else NaN
}
plot(1:7, gini, axes = FALSE, type = "b", ylim = c(0,1), 
     main = "", xlab = "", ylab = "", lwd = 2)
axis(side = 2, at=seq(0,1,0.2), labels = FALSE, cex.axis = 1.3)
axis(side = 1, at=1:7, labels = c("1994","1998","2002","2006","2010","2014","2018"), cex.axis = 1.3)
box(which = "plot", bty = "l")
grid(col = "grey70")
text(4,0.3, "INDEPENDENTS", cex = 1.5)
#text(1:7, gini, labels = round(gini,2), pos = 3)
title(xlab = "Year", ylab = "Party nationalization score of party candidates", outer = TRUE, cex.lab = 1.5)
gini_total[16,] <- round(gini,3)
dev.off()


#################################
########## OUTPUT ###############
#################################

write.xlsx(age_total, "output/parties.xlsx", sheetName = "age", showNA = F)
write.xlsx(gender_total, "output/parties.xlsx", sheetName = "gender", showNA = F, append = T)
write.xlsx(education_total, "output/parties.xlsx", sheetName = "education", showNA = F, append = T)
write.xlsx(gini_total, "output/parties.xlsx", sheetName = "gini", showNA = F, append = T)
write.xlsx(population_total, "output/parties.xlsx", sheetName = "population", showNA = F, append = T)

write.xlsx(party_moves_1994_1998[[1]], "output/party_moves.xlsx", sheetName = "94_98_absolute", showNA = F)
write.xlsx(party_moves_1994_1998[[2]], "output/party_moves.xlsx", sheetName = "94_98_relative_exit", showNA = F, append = T)
write.xlsx(party_moves_1994_1998[[3]], "output/party_moves.xlsx", sheetName = "94_98_relative_entry", showNA = F, append = T)
write.xlsx(party_moves_1998_2002[[1]], "output/party_moves.xlsx", sheetName = "98_02_absolute", showNA = F, append = T)
write.xlsx(party_moves_1998_2002[[2]], "output/party_moves.xlsx", sheetName = "98_02_relative_exit", showNA = F, append = T)
write.xlsx(party_moves_1998_2002[[3]], "output/party_moves.xlsx", sheetName = "98_02_relative_entry", showNA = F, append = T)
write.xlsx(party_moves_2002_2006[[1]], "output/party_moves.xlsx", sheetName = "02_06_absolute", showNA = F, append = T)
write.xlsx(party_moves_2002_2006[[2]], "output/party_moves.xlsx", sheetName = "02_06_relative_exit", showNA = F, append = T)
write.xlsx(party_moves_2002_2006[[3]], "output/party_moves.xlsx", sheetName = "02_06_relative_entry", showNA = F, append = T)
write.xlsx(party_moves_2006_2010[[1]], "output/party_moves.xlsx", sheetName = "06_10_absolute", showNA = F, append = T)
write.xlsx(party_moves_2006_2010[[2]], "output/party_moves.xlsx", sheetName = "06_10_relative_exit", showNA = F, append = T)
write.xlsx(party_moves_2006_2010[[3]], "output/party_moves.xlsx", sheetName = "06_10_relative_entry", showNA = F, append = T)
write.xlsx(party_moves_2010_2014[[1]], "output/party_moves.xlsx", sheetName = "10_14_absolute", showNA = F, append = T)
write.xlsx(party_moves_2010_2014[[2]], "output/party_moves.xlsx", sheetName = "10_14_relative_exit", showNA = F, append = T)
write.xlsx(party_moves_2010_2014[[3]], "output/party_moves.xlsx", sheetName = "10_14_relative_entry", showNA = F, append = T)
write.xlsx(party_moves_2014_2018[[1]], "output/party_moves.xlsx", sheetName = "14_18_absolute", showNA = F, append = T)
write.xlsx(party_moves_2014_2018[[2]], "output/party_moves.xlsx", sheetName = "14_18_relative_exit", showNA = F, append = T)
write.xlsx(party_moves_2014_2018[[3]], "output/party_moves.xlsx", sheetName = "14_18_relative_entry", showNA = F, append = T)


prop.table(table(kvrk_1994$ZENA))
prop.table(table(kvrk_1998$ZENA))
prop.table(table(kvrk_2002$ZENA))
prop.table(table(kvrk_2006$ZENA))
prop.table(table(kvrk_2010$ZENA))
prop.table(table(kvrk_2014$ZENA))
prop.table(table(kvrk_2018$ZENA))


prop.table(table(kvrk_1994$VS_1994))
prop.table(table(kvrk_1998$VS))
prop.table(table(kvrk_2002$VS))
prop.table(table(kvrk_2006$VS))
prop.table(table(kvrk_2010$VS))
prop.table(table(kvrk_2014$VS))
prop.table(table(kvrk_2018$VS_2018))





